A <- Matrix(nrow=4, ncol=4, c(0,1,1,0,1,0,0,0,1,0,0,1,0,0,1,0), sparse=T); A
A2 <- A%*%A; A2
out1 <- A + A%*%A; out1
diag(out1) <- 0; out1
out <- A + ( (A2 - diag(diag(A2)))==1 ); out



a <- c(1, 1 ,3)
b <- c(2, 3, 4)
links <- cbind(a,b); links
A <- sparseMatrix( i=links[,1], j=links[,2], dims=c(4,4) ) + sparseMatrix( i=links[,2], j=links[,1], dims=c(4,4) ); A
A2 <- A%*%A; A2
A3 <- (A2 - Matrix(diag( as(diag(A2), "sparseVector") ), sparse=T) )==1
A4 <- A + A%*%A; diag(A4)<-0; A4

which(A4!=0, arr.ind=TRUE)







library(foreign)
library(lattice)
library(Matrix)

rm(list=ls())


### Part 1

for (year in 2010:2013) {


den <- read.dta(paste("J:/Workdata/704351/Amalie/Networks/JOP Publication Files/Data/Only Unique ID ", year, ".dta", sep=""))


##### PARTNERS

partner_mat <- na.omit( cbind(den$unique, den$unique_partner) )

partner_mat <- sparseMatrix( i=partner_mat[,1], j=partner_mat[,2], dims=c(max(den$unique),max(den$unique)) ) + 
  sparseMatrix( i=partner_mat[,2], j=partner_mat[,1], dims=c(max(den$unique),max(den$unique)) )


##### SIBLING AND PARENTS

family_mat <- na.omit( cbind( c(den$unique,den$unique), c(den$unique_mom,den$unique_dad) ) )

family_mat <- sparseMatrix( i=family_mat[,1], j=family_mat[,2], dims=c(max(den$unique),max(den$unique)) ) + 
  sparseMatrix( i=family_mat[,2], j=family_mat[,1], dims=c(max(den$unique),max(den$unique)) )

family_mat <- family_mat + family_mat%*%family_mat

diag(family_mat) <- 0



##### EDUCATION: all students from graduation cohort

educ_mat <- read.dta(paste("J:/Workdata/704351/Amalie/Networks/JOP Publication Files/Data/Unique Education-cohort ", year, ".dta", sep=""))

educ_mat <- na.omit( cbind(educ_mat$unique, (educ_mat$educ_id+max(den$unique))) )

educ_mat <- sparseMatrix( i=educ_mat[,1], j=educ_mat[,2], dims=c(max(educ_mat[,2]),max(educ_mat[,2])) ) +
  sparseMatrix( i=educ_mat[,2], j=educ_mat[,1], dims=c(max(educ_mat[,2]),max(educ_mat[,2])) )

educ_mat <- (educ_mat%*%educ_mat)[1:max(den$unique),1:max(den$unique)]

diag(educ_mat) <- 0



##### FIRMS: all coworkers for firms with less than 25 employees, firm-education group for larger firms, 2008-2011

coworker_mat <- read.dta(paste("J:/Workdata/704351/Amalie/Networks/JOP Publication Files/Data/Unique Firm-year ", year, ".dta", sep=""))

coworker_mat <- na.omit( cbind(coworker_mat$unique, (coworker_mat$unique_firm_year+max(den$unique))) )

coworker_mat <- sparseMatrix( i=coworker_mat[,1], j=coworker_mat[,2], dims=c(max(coworker_mat[,2]),max(coworker_mat[,2])) ) +
  sparseMatrix( i=coworker_mat[,2], j=coworker_mat[,1], dims=c(max(coworker_mat[,2]),max(coworker_mat[,2])) )

coworker_mat <- (coworker_mat%*%coworker_mat)[1:max(den$unique),1:max(den$unique)]

diag(coworker_mat) <- 0



##### FINAL NETWORK MATRIX

network_mat <- partner_mat + family_mat + educ_mat + coworker_mat

partner_pairs <- as.data.frame( which(partner_mat!=0, arr.ind=TRUE ))
family_pairs <- as.data.frame( which(family_mat!=0, arr.ind=TRUE ))
educ_pairs <- as.data.frame( which(educ_mat!=0, arr.ind=TRUE ))
coworker_pairs <- as.data.frame( which(coworker_mat!=0, arr.ind=TRUE ))

write.dta(partner_pairs, paste("J:/Workdata/704351/Amalie/Networks/JOP Publication Files/Data/partner_pairs ", year, ".dta", sep=""))
write.dta(family_pairs, paste("J:/Workdata/704351/Amalie/Networks/JOP Publication Files/Data/family_pairs ", year, ".dta", sep=""))
write.dta(educ_pairs, paste("J:/Workdata/704351/Amalie/Networks/JOP Publication Files/Data/educ_pairs ", year, ".dta", sep=""))
write.dta(coworker_pairs, paste("J:/Workdata/704351/Amalie/Networks/JOP Publication Files/Data/coworker_pairs ", year, ".dta", sep=""))


##### FIRST STAGE (i-j links)

# Merge in person id and then restrict to panel dataset cases

survey <- read.dta("J:/Workdata/704351/Amalie/Networks/JOP Publication Files/Data/Survey Members.dta")

network_mat_survey <- Matrix(network_mat[c(survey[,1]),], sparse=T)


# Collapse to 2 columns with all links to panel dataset members

network_pairs_FS <- as.data.frame( which(network_mat_survey!=0, arr.ind=TRUE ))

write.dta(network_pairs_FS, paste("J:/Workdata/704351/Amalie/Networks/JOP Publication Files/Data/network_pairs_FS ", year, ".dta", sep=""))



##### REDUCED FORM (i-k links)


rm(list=setdiff(ls(),c("network_mat", "network_mat_survey", "survey", "year")))

network_mat <- Matrix(network_mat, sparse=TRUE)

network_mat <- network_mat_survey %*% network_mat

diag(network_mat) <- 0

instrument_pairs_RF <- as.data.frame( which(network_mat!=0, arr.ind=TRUE) )

write.dta(instrument_pairs_RF, paste("J:/Workdata/704351/Amalie/Networks/JOP Publication Files/Data/instrument_pairs_RF ", year, ".dta", sep=""))


}

### Go back to Stata



### Pt 2

### Construct full i-j-k dataset. Run this after j-dataset has been created ###

# Construct network matrix again

for (year in 2010:2013) {
  
  
  den <- read.dta(paste("J:/Workdata/704351/Amalie/Networks/JOP Publication Files/Data/Only Unique ID ", year, ".dta", sep=""))
  
  
  ##### PARTNERS
  
  partner_mat <- na.omit( cbind(den$unique, den$unique_partner) )
  
  partner_mat <- sparseMatrix( i=partner_mat[,1], j=partner_mat[,2], dims=c(max(den$unique),max(den$unique)) ) + 
    sparseMatrix( i=partner_mat[,2], j=partner_mat[,1], dims=c(max(den$unique),max(den$unique)) )
  
  
  ##### SIBLING AND PARENTS
  
  family_mat <- na.omit( cbind( c(den$unique,den$unique), c(den$unique_mom,den$unique_dad) ) )
  
  family_mat <- sparseMatrix( i=family_mat[,1], j=family_mat[,2], dims=c(max(den$unique),max(den$unique)) ) + 
    sparseMatrix( i=family_mat[,2], j=family_mat[,1], dims=c(max(den$unique),max(den$unique)) )
  
  family_mat <- family_mat + family_mat%*%family_mat
  
  diag(family_mat) <- 0
  
  
  
  ##### EDUCATION: all students from graduation cohort
  
  educ_mat <- read.dta(paste("J:/Workdata/704351/Amalie/Networks/JOP Publication Files/Data/Unique Education-cohort ", year, ".dta", sep=""))
  
  educ_mat <- na.omit( cbind(educ_mat$unique, (educ_mat$educ_id+max(den$unique))) )
  
  educ_mat <- sparseMatrix( i=educ_mat[,1], j=educ_mat[,2], dims=c(max(educ_mat[,2]),max(educ_mat[,2])) ) +
    sparseMatrix( i=educ_mat[,2], j=educ_mat[,1], dims=c(max(educ_mat[,2]),max(educ_mat[,2])) )
  
  educ_mat <- (educ_mat%*%educ_mat)[1:max(den$unique),1:max(den$unique)]
  
  diag(educ_mat) <- 0
  
  
  
  ##### FIRMS: all coworkers for firms with less than 25 employees, firm-education group for larger firms, 2008-2011
  
  coworker_mat <- read.dta(paste("J:/Workdata/704351/Amalie/Networks/JOP Publication Files/Data/Unique Firm-year ", year, ".dta", sep=""))
  
  coworker_mat <- na.omit( cbind(coworker_mat$unique, (coworker_mat$unique_firm_year+max(den$unique))) )
  
  coworker_mat <- sparseMatrix( i=coworker_mat[,1], j=coworker_mat[,2], dims=c(max(coworker_mat[,2]),max(coworker_mat[,2])) ) +
    sparseMatrix( i=coworker_mat[,2], j=coworker_mat[,1], dims=c(max(coworker_mat[,2]),max(coworker_mat[,2])) )
  
  coworker_mat <- (coworker_mat%*%coworker_mat)[1:max(den$unique),1:max(den$unique)]
  
  diag(coworker_mat) <- 0
  
  
  
  ##### FINAL NETWORK MATRIX
  
  network_mat <- partner_mat + family_mat + educ_mat + coworker_mat


##### j-k links to be able to map full network chain (i to j to k)

# Merge in person id and then restrict to j dataset cases

survey_j <- read.dta("J:/Workdata/704351/Amalie/Networks/JOP Publication Files/Data/J of Survey Members.dta")

network_mat_survey_j <- Matrix(network_mat[c(survey_j[,1]),], sparse=T)

# Collapse to 2 columns with all links to j members

network_pairs_jk <- as.data.frame( which(network_mat_survey_j!=0, arr.ind=TRUE ))

write.dta(network_pairs_jk, paste("J:/Workdata/704351/Amalie/Networks/JOP Publication Files/Data/network_pairs_jk ", year, ".dta", sep=""))

}







